result <- df_orders_line_items |>
dplyr::full_join(df_products,
by = c("ProductID" = "ProductID")) |>
dplyr::full_join(df_category,
by = c("CategoryID" = "CategoryID")) |>
dplyr::mutate(subtotal = UnitPrice.x * Quantity * (1 - Discount)) |>
dplyr::group_by(OrderID, CategoryName) |>
dplyr::summarise(totalsale = sum(subtotal), .groups = 'drop') |>
dplyr::ungroup() |>
dplyr::group_by(CategoryName) |>
dplyr::summarise(avgsale = mean(totalsale), .groups = 'drop')
print(result)
## # A tibble: 8 × 2
## CategoryName avgsale
## <chr> <dbl>
## 1 Beverages 809.
## 2 Condiments 589.
## 3 Confections 600.
## 4 Dairy Products 829.
## 5 Grains/Cereals 553.
## 6 Meat/Poultry 1107.
## 7 Produce 816.
## 8 Seafood 487.
# Create ggplot object
ggplot_obj <- ggplot(result, aes(x = CategoryName, y = avgsale, fill = CategoryName)) +
geom_col() +
theme(axis.text.y = element_blank(), axis.title.y = element_blank()) +
theme_minimal() # You can customize the theme as needed
# Create plotly object and adjust size
plotly_obj <- plotly::ggplotly(ggplot_obj, height = 400, width = 950)
# Show the plotly object
plotly_obj
df_orders |>
dplyr::select(OrderID,OrderDate,)|>
dplyr::full_join(df_orders_line_items|>
dplyr::select( OrderID ,ProductID, UnitPrice, Quantity),
by = c("OrderID" = "OrderID")) |>
dplyr::full_join(df_products|>
dplyr::select( CategoryID ,ProductID),
by = c("ProductID" = "ProductID")) |>
dplyr::full_join(df_category|>
dplyr::select( CategoryID ,CategoryName),
by = c("CategoryID" = "CategoryID")) |>
dplyr::mutate(subtotal = UnitPrice * Quantity,
Quarter = quarter(as.Date(OrderDate))) |>
dplyr::group_by(Quarter, CategoryName) |>
dplyr::summarize(total_sales = sum(subtotal))|>
plot_ly(x = ~Quarter, y = ~total_sales, type = 'bar', color = ~CategoryName) %>%
layout(title = "Total Sales by Category and Quarter",
xaxis = list(title = "Quarter"),
yaxis = list(title = "Total Sales"),
barmode = 'stack')
## `summarise()` has grouped output by 'Quarter'. You can override using the
## `.groups` argument.
supplier_revenue <- df_orders_line_items |>
dplyr::full_join(df_products,
by = c("ProductID" = "ProductID")) |>
dplyr::full_join(df_suppliers,
by = c("SupplierID" = "SupplierID"))|>
dplyr::mutate(subtotal = UnitPrice.x * Quantity * (1- Discount)) |>
dplyr::group_by(CompanyName)|>
dplyr::summarise(totalsale= sum(subtotal ,na.rm = TRUE))
print(supplier_revenue)
## # A tibble: 29 × 2
## CompanyName totalsale
## <chr> <dbl>
## 1 Aux joyeux ecclsiastiques 163135
## 2 Bigfoot Breweries 23777.
## 3 Cooperativa de Quesos 'Las Cabras' 26769.
## 4 Escargots Nouveaux 6665.
## 5 Exotic Liquids 35917.
## 6 Formaggi Fortini s.r.l. 51082.
## 7 Forts d'rables 66267.
## 8 G'day, Mate 69637.
## 9 Gai pturage 126582
## 10 Grandma Kelly's Homestead 43569
## # ℹ 19 more rows
ggplot_obje <- ggplot(supplier_revenue, aes(y = CompanyName, x = totalsale, fill = CompanyName)) +
geom_col() +
theme(axis.text.y = element_blank(), axis.title.y = element_blank()) +
theme_minimal() # You can customize the theme as needed
# Create plotly object and adjust size
plotly_obje <- plotly::ggplotly(ggplot_obje, height = 400, width = 950)
# Show the plotly object
plotly_obje
supplier_product_categories <- df_category|>
dplyr::full_join(df_products,
by = c("CategoryID" = "CategoryID")) |>
dplyr::full_join(df_suppliers,
by = c("SupplierID" = "SupplierID"))
supplier_category_counts <- supplier_product_categories %>%
group_by(CompanyName, CategoryName) %>%
summarise(NumProducts = n()) %>%
arrange(CompanyName, desc(NumProducts))
## `summarise()` has grouped output by 'CompanyName'. You can override using the
## `.groups` argument.
# Print the resulting dataset
print(supplier_category_counts)
## # A tibble: 49 × 3
## # Groups: CompanyName [29]
## CompanyName CategoryName NumProducts
## <chr> <chr> <int>
## 1 Aux joyeux ecclsiastiques Beverages 2
## 2 Bigfoot Breweries Beverages 3
## 3 Cooperativa de Quesos 'Las Cabras' Dairy Products 2
## 4 Escargots Nouveaux Seafood 1
## 5 Exotic Liquids Beverages 2
## 6 Exotic Liquids Condiments 1
## 7 Formaggi Fortini s.r.l. Dairy Products 3
## 8 Forts d'rables Condiments 1
## 9 Forts d'rables Confections 1
## 10 G'day, Mate Grains/Cereals 1
## # ℹ 39 more rows
interactive_plot <- ggplot(supplier_category_counts, aes(y = CompanyName, x = NumProducts, fill = CategoryName, text = paste("Category: ", CategoryName, "<br>Num Products: ", NumProducts))) +
geom_bar(stat = "identity", position = "stack") +
labs(title = "Number of Products in Each Category by Supplier",
x = "Number of Products",
y = "Supplier Name",
fill = "Category Name") +
theme_minimal()
# Convert ggplot to plotly with tooltips
interactive_plot <- ggplotly(interactive_plot, tooltip = "text")
# Show the interactive plot
interactive_plot
product_revenue <- df_orders |>
dplyr::select(OrderID, OrderDate) |>
dplyr::full_join(df_orders_line_items,
by = c("OrderID" = "OrderID")) |>
dplyr::full_join(df_products,
by = c("ProductID" = "ProductID")) |>
dplyr::full_join(df_category,
by = c("CategoryID" = "CategoryID")) |>
dplyr::mutate(subtotal = UnitPrice.x * Quantity * (1 - Discount),
Year = lubridate::year(as.Date(OrderDate))) |>
dplyr::group_by(Year, ProductName, .drop = TRUE) |>
dplyr::summarize(total_sales = sum(subtotal), .groups = 'drop')
print(product_revenue)
## # A tibble: 227 × 3
## Year ProductName total_sales
## <dbl> <chr> <dbl>
## 1 1996 Alice Mutton 7301.
## 2 1996 Aniseed Syrup 240
## 3 1996 Boston Crab Meat 2999.
## 4 1996 Camembert Pierrot 10064
## 5 1996 Carnarvon Tigers 5300
## 6 1996 Chai 1800
## 7 1996 Chang 3435.
## 8 1996 Chartreuse verte 3830.
## 9 1996 Chef Anton's Cajun Seasoning 1883.
## 10 1996 Chef Anton's Gumbo Mix 2193
## # ℹ 217 more rows
ggplot_obj <- ggplot(product_revenue, aes(y = as.factor(Year), x = total_sales, fill = ProductName)) +
geom_col() +
theme(axis.text.y = element_blank(), axis.title.y = element_blank()) +
theme_minimal()
plotly_obj <- plotly::ggplotly(ggplot_obj, height = 400, width = 950)
plotly_obj
product_sales <- df_orders_line_items |>
dplyr::full_join(df_products,
by = c("ProductID" = "ProductID")) |>
dplyr::mutate(subtotal = UnitPrice.x * Quantity * (1- Discount)) |>
dplyr::group_by(ProductName)|>
dplyr::summarise(totalsale= sum(subtotal ,na.rm = TRUE),
total_quantity = sum(Quantity, na.rm = TRUE))|>
dplyr::arrange(desc(totalsale))
top_10_products <- product_sales[1:10, ]
print(top_10_products)
## # A tibble: 10 × 3
## ProductName totalsale total_quantity
## <chr> <dbl> <int>
## 1 Cte de Blaye 149984. 623
## 2 Thringer Rostbratwurst 87736. 746
## 3 Raclette Courdavault 76296 1496
## 4 Camembert Pierrot 50286 1577
## 5 Tarte au sucre 49828. 1083
## 6 Gnocchi di nonna Alice 45121. 1263
## 7 Manjimup Dried Apples 44743. 886
## 8 Alice Mutton 35482. 978
## 9 Carnarvon Tigers 31988. 539
## 10 Rssle Sauerkraut 26866. 640
country_avg_freight <- df_orders |>
group_by(ShipCountry) |>
summarise(AvgFreight = mean(Freight, na.rm = TRUE))
print(country_avg_freight)
## # A tibble: 21 × 2
## ShipCountry AvgFreight
## <chr> <dbl>
## 1 Argentina 37.4
## 2 Austria 185.
## 3 Belgium 67.4
## 4 Brazil 58.8
## 5 Canada 73.3
## 6 Denmark 77.6
## 7 Finland 41.4
## 8 France 55.0
## 9 Germany 92.5
## 10 Ireland 145.
## # ℹ 11 more rows
ggplot_obj <- ggplot(country_avg_freight, aes(x = ShipCountry, y = AvgFreight)) +
geom_col(fill = "skyblue") +
theme(axis.text.y = element_blank(), axis.title.y = element_blank()) +
theme_minimal()
plotly_obj <- plotly::ggplotly(ggplot_obj, height = 400, width = 1100)
plotly_obj
freight_summary <- df_orders |>
summarise(
AvgFreight = mean(Freight, na.rm = TRUE),
MinFreight = min(Freight, na.rm = TRUE),
MaxFreight = max(Freight, na.rm = TRUE)
)
# Print the result
print(freight_summary)
## AvgFreight MinFreight MaxFreight
## 1 78.2442 0.02 1007.64
df_orders$OrderDate <- ymd_hms(df_orders$OrderDate)
df_orders$Month <- format(df_orders$OrderDate, "%Y-%m")
df_orders$Year <- format(df_orders$OrderDate, "%Y")
monthly_analysis <- df_orders %>%
group_by(Month) %>%
summarise(AvgFreight = mean(Freight, na.rm = TRUE))
print(head(monthly_analysis))
## # A tibble: 6 × 2
## Month AvgFreight
## <chr> <dbl>
## 1 1996-07 58.6
## 2 1996-08 55.9
## 3 1996-09 48.8
## 4 1996-10 58.5
## 5 1996-11 86.1
## 6 1996-12 90.3
ggplot(monthly_analysis, aes(x = Month, y = AvgFreight, group=1)) +
geom_line(color = "red") +
geom_point(color = "red") +
labs(title = "Average Freight Costs Over Months",
x = "Month",
y = "Average Freight Cost") +
theme_minimal() +
scale_y_continuous(labels = scales::dollar_format()) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
yearly_analysis <- df_orders %>%
group_by(Year) %>%
summarise(AvgFreight = mean(Freight, na.rm = TRUE))
print(head(yearly_analysis))
## # A tibble: 3 × 2
## Year AvgFreight
## <chr> <dbl>
## 1 1996 67.6
## 2 1997 79.6
## 3 1998 82.2
ggplot(yearly_analysis, aes(x = Year, y = AvgFreight , group=1)) +
geom_line(color = "blue") +
geom_point(color = "blue") +
labs(title = "Average Freight Costs Over Year",
x = "Year",
y = "Average Freight Cost") +
theme_minimal() +
scale_y_continuous(labels = scales::dollar_format()) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
df_orders$OrderDate <- as.Date(df_orders$OrderDate)
employee_data <- df_employes %>%
full_join(df_orders, by = c("EmployeeID" = "EmployeeID")) %>%
full_join(df_orders_line_items, by = c("OrderID" = "OrderID")) %>%
mutate(subtotal = UnitPrice * Quantity * (1 - Discount))
employee_sales <- employee_data %>%
select(OrderDate, FirstName, subtotal) %>%
tidyr::complete(
OrderDate = seq(from = min(df_orders$OrderDate), to = max(df_orders$OrderDate), by = "days"),
FirstName = unique(FirstName),
fill = list(subtotal = 0)
)
rolling_employee_data <- employee_sales %>%
arrange(FirstName, OrderDate) %>%
group_by(FirstName) %>%
mutate(sales_past_30d = zoo::rollapplyr(subtotal, width = 30, FUN = sum, fill = NA, align = "right", partial = TRUE))
print(rolling_employee_data)
## # A tibble: 7,435 × 4
## # Groups: FirstName [9]
## OrderDate FirstName subtotal sales_past_30d
## <date> <chr> <dbl> <dbl>
## 1 1996-07-04 Andrew 0 0
## 2 1996-07-05 Andrew 0 0
## 3 1996-07-06 Andrew 0 0
## 4 1996-07-07 Andrew 0 0
## 5 1996-07-08 Andrew 0 0
## 6 1996-07-09 Andrew 0 0
## 7 1996-07-10 Andrew 0 0
## 8 1996-07-11 Andrew 0 0
## 9 1996-07-12 Andrew 0 0
## 10 1996-07-13 Andrew 0 0
## # ℹ 7,425 more rows
# Your ggplot code with OrderDate included in the text aesthetic
gg <- ggplot(rolling_employee_data, aes(x = OrderDate, y = sales_past_30d, color = FirstName, group = FirstName, text = paste("Order Date: ", OrderDate, "<br>Sales: ", scales::dollar(sales_past_30d, digits = 3)))) +
geom_line() +
labs(title = "Rolling Past 30 Days Sales",
y = "Sales (in dollars)") +
facet_wrap(~FirstName, scales = "free_y", ncol = 2, drop = FALSE, strip.position = "bottom") +
theme_minimal() +
theme(axis.text.x = element_blank()) # Hide x-axis labels
# Convert ggplot to plotly with increased size
interactive_plot <- ggplotly(gg, tooltip = "text", height = 900, width = 900)
# Print or display the interactive plot
interactive_plot
# Check for missing values in OrderDate
if (sum(is.na(df_orders$OrderDate)) > 0) {
stop("Missing values in OrderDate column. Handle them appropriately.")
}
df_result <- df_orders %>%
full_join(df_orders_line_items %>%
select(OrderID, ProductID, UnitPrice, Quantity),
by = "OrderID") %>%
full_join(df_products %>%
select(ProductID, ProductName, CategoryID),
by = "ProductID") %>%
full_join(df_category %>%
select(CategoryID, CategoryName),
by = "CategoryID") %>%
mutate(subtotal = UnitPrice * Quantity) %>%
select(OrderDate, ProductName, subtotal) %>%
tidyr::complete(OrderDate = seq(from = min(df_orders$OrderDate), to = max(df_orders$OrderDate), by = "days"),ProductName = unique(ProductName),fill = list(subtotal = 0))
rool_data <- df_result|>
arrange(ProductName ,OrderDate)|>
group_by(ProductName) |>
mutate(sales_past_30d = zoo::rollapplyr(subtotal, width = 30, FUN = sum, fill = NA, align = "right", partial = TRUE))
print(rool_data)
## # A tibble: 51,793 × 4
## # Groups: ProductName [77]
## OrderDate ProductName subtotal sales_past_30d
## <date> <chr> <dbl> <dbl>
## 1 1996-07-04 Alice Mutton 0 0
## 2 1996-07-05 Alice Mutton 0 0
## 3 1996-07-06 Alice Mutton 0 0
## 4 1996-07-07 Alice Mutton 0 0
## 5 1996-07-08 Alice Mutton 0 0
## 6 1996-07-09 Alice Mutton 0 0
## 7 1996-07-10 Alice Mutton 0 0
## 8 1996-07-11 Alice Mutton 0 0
## 9 1996-07-12 Alice Mutton 0 0
## 10 1996-07-13 Alice Mutton 0 0
## # ℹ 51,783 more rows
shinyApp(
ui = fluidPage(
titlePanel("Dynamic Line Plot"),
sidebarLayout(
sidebarPanel(
selectInput("product_selector", "Select Products:", choices = unique(rool_data$ProductName))
),
mainPanel(
plotlyOutput("line_plot")
)
)
),
server = function(input, output) {
# Filter data based on selected employee
selected_data <- reactive({
rool_data[rool_data$ProductName == input$product_selector, ]
})
# Render the dynamic plot
output$line_plot <- renderPlotly({
plot_ly(data = selected_data(), x = ~OrderDate, y = ~sales_past_30d,
text = ~paste("ProductName: ", ProductName, "<br>Date: ", OrderDate, "<br>Sales: ", scales::dollar(sales_past_30d, digits = 3)),
type = "scatter", mode = "lines",
width = 1200, height = 600) %>%
layout(title = paste("Rolling Past 30 Days Sales for", input$product_selector),
xaxis = list(title = "Order Date"),
yaxis = list(title = "Sales (in dollars)"),
showlegend = TRUE,
hovermode = "closest"
)
})
}
)
df_total_sale <- df_orders_line_items |>
dplyr::full_join(df_orders, by = c("OrderID" = "OrderID")) |>
dplyr::mutate(subtotal = UnitPrice * Quantity * (1 - Discount)) |>
dplyr::group_by(ShipCity, ShipCountry) |>
dplyr::summarize(city_sales = sum(subtotal)) |>
dplyr::group_by(ShipCountry) |>
dplyr::mutate(country_sales = sum(city_sales)) |>
dplyr::mutate(p_country_sales = round(city_sales / country_sales, 2))
print(df_total_sale)
## # A tibble: 70 × 5
## # Groups: ShipCountry [21]
## ShipCity ShipCountry city_sales country_sales p_country_sales
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Aachen Germany 3763. 244641. 0.02
## 2 Albuquerque USA 52246. 263567. 0.2
## 3 Anchorage USA 16325. 263567. 0.06
## 4 Barcelona Spain 837. 19432. 0.04
## 5 Barquisimeto Venezuela 17825. 60815. 0.29
## 6 Bergamo Italy 7604. 16705. 0.46
## 7 Berlin Germany 4596. 244641. 0.02
## 8 Bern Switzerland 12886. 32920. 0.39
## 9 Boise USA 115673. 263567. 0.44
## 10 Brandenburg Germany 31746. 244641. 0.13
## # ℹ 60 more rows
# Create ggplot object
gg_plot <- ggplot(df_total_sale, aes(x = ShipCountry, y = p_country_sales, fill = ShipCity)) +
geom_bar(stat = "identity")
# Convert to plotly
plotly_plot <- plotly::ggplotly(gg_plot)
# Modify layout to increase width
layout(plotly_plot, width = 1200)